package com.xiaojiezhu.spark.rdd.rddcreate;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;

import java.util.Arrays;
import java.util.List;

/**
 * @Author 朱小杰
 * 时间 2017-09-24 .10:04
 * 说明 java版创建rdd
 */
public class JavaRddCreate {

    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local").setAppName("App Name");
        JavaSparkContext sc = new JavaSparkContext(conf);

        JavaRDD<String> lines = sc.textFile("G:\\javacode\\workspace\\spark\\spark-rdd\\src\\main\\java\\com\\xiaojiezhu\\spark\\rdd\\rddcreate\\package-info.java");
        //过滤...开始
        JavaRDD<String> java = lines.filter(new Function<String, Boolean>() {
            @Override
            public Boolean call(String line) throws Exception {
                return line.contains("java");
            }
        });
        JavaRDD<String> rdd = lines.filter(str -> str.contains("spark"));
        System.out.println(rdd.count());
        //过滤 ...结束

        //指定已有集合为rdd...开始
        JavaRDD<String> stringJavaRDD = sc.parallelize(Arrays.asList("helo wor", "dajiahao n", "a b a"));
        //指定已有集合为rdd...结束

        JavaRDD<String> rdds = sc.parallelize(Arrays.asList("error:1", "error:2", "warn:3", "warn:4", "info:1", "info:2"));
        JavaRDD<String> warnRdd = rdds.filter(line -> line.contains("warn"));
        JavaRDD<String> errorRdd = rdds.filter(line -> line.contains("error"));
        JavaRDD<String> union = warnRdd.union(errorRdd);//把两个rdd合并在一起


        List<String> take = union.take(1);//take可以拿到前面几个的集合
        System.out.println(take);

        JavaRDD<String> map = union.map(new Function<String, String>() {
            @Override
            public String call(String v1) throws Exception {
                return v1 + "======";
            }
        });
        System.out.println(map.collect().toString());


        JavaRDD<Integer> rddd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
        JavaRDD<Integer> sample = rddd.sample(false, 0.5);
        System.out.println(sample.collect().toString());

    }
}
